import numpy
import pyximport

pyximport.install(setup_args={"include_dirs":numpy.get_include()},
                  reload_support=True)

import timeit
import time

import dcnn_inner

a = numpy.asarray(numpy.random.rand(50), dtype=dcnn_inner.FLOAT_TYPE)
loops = 100


def sigmoid_1():
    return dcnn_inner.sigmoid_1(a)


def sigmoid_2():
    return dcnn_inner.sigmoid_2(a)

def sigmoid_3():
    return dcnn_inner.sigmoid_3(a)


def benchmark_sigmoid():
    t = timeit.Timer("sigmoid_2()", "from __main__ import sigmoid_2")
    t1 = t.timeit(number=loops)
    t = timeit.Timer("sigmoid_1()", "from __main__ import sigmoid_1")
    t2 = t.timeit(number=loops)
    t = timeit.Timer("sigmoid_3()", "from __main__ import sigmoid_3")
    t3 = t.timeit(number=loops)
    print 'sigmoid_1 is %f times faster than sigmoid_2' % (t1 / t2)
    print 'sigmoid_3 is %f times faster than sigmoid_2' % (t1 / t3)
    print numpy.linalg.norm(sigmoid_1() - sigmoid_2())
    print numpy.linalg.norm(sigmoid_3() - sigmoid_2())

def tanh_1():
    return dcnn_inner.tanh_1(a)

def tanh_2():
    return dcnn_inner.tanh_2(a)

def benchmark_tanh():
    t = timeit.Timer("tanh_2()", "from __main__ import tanh_2")
    t1 = t.timeit(number=loops)
    t = timeit.Timer("tanh_1()", "from __main__ import tanh_1")
    t2 = t.timeit(number=loops)
    print 'tanh_1 is %f times faster than tanh_2' % (t1 / t2)
    print numpy.linalg.norm(tanh_1() - tanh_2())

def softmax_1():
    return dcnn_inner.softmax_1(a)


def softmax_2():
    return dcnn_inner.softmax_2(a)


def benchmark_softmax():
    t = timeit.Timer("softmax_2()", "from __main__ import softmax_2")
    t1 = t.timeit(number=loops)
    t = timeit.Timer("softmax_1()", "from __main__ import softmax_1")
    t2 = t.timeit(number=loops)
    print 'softmax_1 is %f times faster than softmax_2' % (t1 / t2)
    assert numpy.linalg.norm(softmax_1() - softmax_2()) == 0


input_layer_backward_wordvec_dim = 50
input_layer_backward_vocab_size = 100000
input_layer_backward_decay = 1e-4
input_layer_backward_indexes = numpy.asarray(
    numpy.clip(numpy.random.rand(200) * input_layer_backward_vocab_size, 0, input_layer_backward_vocab_size - 1),
    dtype=numpy.uint32)
input_layer_backward_delta = numpy.asarray(
    numpy.random.rand(input_layer_backward_wordvec_dim * len(input_layer_backward_indexes)), \
    dtype=dcnn_inner.FLOAT_TYPE).reshape((1, input_layer_backward_wordvec_dim, len(input_layer_backward_indexes)))
input_layer_backward_W = numpy.asarray(
    numpy.random.rand(input_layer_backward_wordvec_dim * input_layer_backward_vocab_size), \
    dtype=dcnn_inner.FLOAT_TYPE).reshape((input_layer_backward_wordvec_dim, input_layer_backward_vocab_size))


def input_layer_backward_1():
    return dcnn_inner.input_layer_backward_1(input_layer_backward_delta, input_layer_backward_decay,
                                             input_layer_backward_W, input_layer_backward_indexes)


def input_layer_backward_2():
    return dcnn_inner.input_layer_backward_2(input_layer_backward_delta, input_layer_backward_decay,
                                             input_layer_backward_W, input_layer_backward_indexes)

def input_layer_backward_3():
    return dcnn_inner.input_layer_backward_3(input_layer_backward_delta, input_layer_backward_decay,
                                             input_layer_backward_W, input_layer_backward_indexes)


def benchmark_input_layer_backward():
    t = timeit.Timer("input_layer_backward_2()", "from __main__ import input_layer_backward_2")
    t1 = t.timeit(number=loops)
    t = timeit.Timer("input_layer_backward_1()", "from __main__ import input_layer_backward_1")
    t2 = t.timeit(number=loops)
    t = timeit.Timer("input_layer_backward_3()", "from __main__ import input_layer_backward_3")
    t3 = t.timeit(number=loops)
    print 'input_layer_backward_1 is %f times faster than input_layer_backward_2' % (t1 / t2)
    print 'input_layer_backward_3 is %f times faster than input_layer_backward_2' % (t1 / t3)
    print numpy.linalg.norm(input_layer_backward_1() - input_layer_backward_2())
    print numpy.linalg.norm(input_layer_backward_3() - input_layer_backward_2())


wide_convolution2d_input = numpy.asarray(
    numpy.random.rand(100 * 100),
    dtype=dcnn_inner.FLOAT_TYPE). \
    reshape((100, 100))
wide_convolution2d_filter = numpy.asarray(numpy.random.rand(5), dtype=dcnn_inner.FLOAT_TYPE)


def wide_convolution2d_1():
    return dcnn_inner.wide_convolution2d_1(wide_convolution2d_input, wide_convolution2d_filter)


def wide_convolution2d_2():
    return dcnn_inner.wide_convolution2d_2(wide_convolution2d_input, wide_convolution2d_filter)


def wide_convolution2d_3():
    return dcnn_inner.wide_convolution2d_3(wide_convolution2d_input, wide_convolution2d_filter)


def benchmark_wide_convolution2d():
    t = timeit.Timer("wide_convolution2d_1()", "from __main__ import wide_convolution2d_1")
    t1 = t.timeit(number=loops)
    t = timeit.Timer("wide_convolution2d_2()", "from __main__ import wide_convolution2d_2")
    t2 = t.timeit(number=loops)
    t = timeit.Timer("wide_convolution2d_3()", "from __main__ import wide_convolution2d_3")
    t3 = t.timeit(number=loops/10)
    print 'wide_convolution2d_1 is %f times faster than wide_convolution2d_2' % (t2 / t1)

    print 'wide_convolution2d_3 is %f times faster than wide_convolution2d_2' % (t2 / t3)
    print numpy.linalg.norm(wide_convolution2d_1() - wide_convolution2d_2())
    print numpy.linalg.norm(wide_convolution2d_3() - wide_convolution2d_2())


wide_convolution_layer_forward_n_feature_maps = 10
wide_convolution_layer_forward_n_feature_rows = 100
wide_convolution_layer_forward_n_feature_cols = 100
wide_convolution_layer_forward_input = numpy.random.rand(100000).astype(dcnn_inner.FLOAT_TYPE) \
    .reshape((wide_convolution_layer_forward_n_feature_maps,
              wide_convolution_layer_forward_n_feature_rows,
              wide_convolution_layer_forward_n_feature_cols))
wide_convolution_layer_forward_window_size = 5
wide_convolution_layer_forward_n_filters = 2
wide_convolution_layer_forward_W = numpy.random.rand(wide_convolution_layer_forward_n_feature_maps * \
                                                     wide_convolution_layer_forward_n_filters * wide_convolution_layer_forward_window_size) \
    .astype(dcnn_inner.FLOAT_TYPE).reshape(wide_convolution_layer_forward_n_feature_maps,
                                           wide_convolution_layer_forward_n_filters, 1,
                                           wide_convolution_layer_forward_window_size)
wide_convolution_layer_forward_b = numpy.random.rand(wide_convolution_layer_forward_n_filters).astype(
    dcnn_inner.FLOAT_TYPE)
wide_convolution_layer_backward_grad = numpy.random.rand(104000).astype(dcnn_inner.FLOAT_TYPE) \
    .reshape((wide_convolution_layer_forward_n_feature_maps,
              wide_convolution_layer_forward_n_feature_rows,
              wide_convolution_layer_forward_n_feature_cols+wide_convolution_layer_forward_window_size-1))

wide_convolution_layer_backward_decay = 1e-4


def wide_convolution_layer_forward_1():
    return dcnn_inner.wide_convolution_layer_forward_1(wide_convolution_layer_forward_input,
                                                       wide_convolution_layer_forward_window_size,
                                                       wide_convolution_layer_forward_n_filters,
                                                       wide_convolution_layer_forward_W,
                                                       wide_convolution_layer_forward_b)


def wide_convolution_layer_forward_2():
    return dcnn_inner.wide_convolution_layer_forward_2(wide_convolution_layer_forward_input,
                                                       wide_convolution_layer_forward_window_size,
                                                       wide_convolution_layer_forward_n_filters,
                                                       wide_convolution_layer_forward_W,
                                                       wide_convolution_layer_forward_b)

def wide_convolution_layer_forward_3():
    return dcnn_inner.wide_convolution_layer_forward_3(wide_convolution_layer_forward_input,
                                                       wide_convolution_layer_forward_window_size,
                                                       wide_convolution_layer_forward_n_filters,
                                                       wide_convolution_layer_forward_W,
                                                       wide_convolution_layer_forward_b)


def benchmark_wide_convolution_layer_forward():
    t = timeit.Timer("wide_convolution_layer_forward_1()", "from __main__ import wide_convolution_layer_forward_1")
    t1 = t.timeit(number=loops)
    t = timeit.Timer("wide_convolution_layer_forward_2()", "from __main__ import wide_convolution_layer_forward_2")
    t2 = t.timeit(number=loops)
    t = timeit.Timer("wide_convolution_layer_forward_3()", "from __main__ import wide_convolution_layer_forward_3")
    t3 = t.timeit(number=loops)
    print 'wide_convolution_layer_forward_1 is %f times faster than wide_convolution_layer_forward_2' % (t2 / t1)
    print 'wide_convolution_layer_forward_3 is %f times faster than wide_convolution_layer_forward_2' % (t2 / t3)
    print numpy.linalg.norm(wide_convolution_layer_forward_1() - wide_convolution_layer_forward_2())
    print numpy.linalg.norm(wide_convolution_layer_forward_3() - wide_convolution_layer_forward_2())

def wide_convolution_layer_backward_1():
    return dcnn_inner.wide_convolution_layer_backward_1(wide_convolution_layer_forward_input,
                                                        wide_convolution_layer_forward_W,
                                                        wide_convolution_layer_forward_b,
                                                        wide_convolution_layer_backward_grad,
                                                        wide_convolution_layer_backward_decay)

def wide_convolution_layer_backward_2():
    return dcnn_inner.wide_convolution_layer_backward_2(wide_convolution_layer_forward_input,
                                                        wide_convolution_layer_forward_W,
                                                        wide_convolution_layer_forward_b,
                                                        wide_convolution_layer_backward_grad,
                                                        wide_convolution_layer_backward_decay)

def benchmark_wide_convolution_layer_backward():
    t = timeit.Timer("wide_convolution_layer_backward_1()", "from __main__ import wide_convolution_layer_backward_1")
    t1 = t.timeit(number=loops)
    t = timeit.Timer("wide_convolution_layer_backward_2()", "from __main__ import wide_convolution_layer_backward_2")
    t2 = t.timeit(number=loops)
    print 'wide_convolution_layer_backward_1 is %f times faster than wide_convolution_layer_backward_2' % (t2 / t1)
    print numpy.linalg.norm(wide_convolution_layer_backward_1()[0] - wide_convolution_layer_backward_2()[0])
    print numpy.linalg.norm(wide_convolution_layer_backward_1()[1] - wide_convolution_layer_backward_2()[1])
    print numpy.linalg.norm(wide_convolution_layer_backward_1()[2] - wide_convolution_layer_backward_2()[2])

k_max_pooling_image_input_image = numpy.random.rand(5000).astype(dcnn_inner.FLOAT_TYPE).reshape((50, 100))
k_max_pooling_image_k = 10
k_max_pooling_image_b = numpy.random.rand(50).astype(dcnn_inner.FLOAT_TYPE)


def k_max_pooling_image_1():
    return dcnn_inner.k_max_pooling_image_1(k_max_pooling_image_input_image, k_max_pooling_image_k,
                                            k_max_pooling_image_b)


def k_max_pooling_image_2():
    return dcnn_inner.k_max_pooling_image_2(k_max_pooling_image_input_image, k_max_pooling_image_k,
                                            k_max_pooling_image_b)


def k_max_pooling_image_3():
    return dcnn_inner.k_max_pooling_image_3(k_max_pooling_image_input_image, k_max_pooling_image_k,
                                            k_max_pooling_image_b)


def benchmark_k_max_pooling_image():
    t = timeit.Timer("k_max_pooling_image_1()", "from __main__ import k_max_pooling_image_1")
    t1 = t.timeit(number=loops)
    t = timeit.Timer("k_max_pooling_image_2()", "from __main__ import k_max_pooling_image_2")
    t2 = t.timeit(number=loops)
    t = timeit.Timer("k_max_pooling_image_3()", "from __main__ import k_max_pooling_image_3")
    t3 = t.timeit(number=loops)
    print 'k_max_pooling_image_1 is %f times faster than k_max_pooling_image_2' % (t2 / t1)
    print 'k_max_pooling_image_3 is %f times faster than k_max_pooling_image_2' % (t2 / t3)
    print numpy.linalg.norm(k_max_pooling_image_1()[0] - k_max_pooling_image_2()[0])
    print numpy.linalg.norm(k_max_pooling_image_1()[1] - k_max_pooling_image_2()[1])
    print numpy.linalg.norm(k_max_pooling_image_3()[0] - k_max_pooling_image_2()[0])
    print numpy.linalg.norm(k_max_pooling_image_3()[1] - k_max_pooling_image_2()[1])


folding_image_input = numpy.random.rand(5000).astype(dcnn_inner.FLOAT_TYPE).reshape((50, 100))


def folding_image_1():
    return dcnn_inner.folding_image_1(folding_image_input)


def folding_image_2():
    return dcnn_inner.folding_image_2(folding_image_input)


def benchmark_folding_image_input():
    t = timeit.Timer("folding_image_1()", "from __main__ import folding_image_1")
    t1 = t.timeit(number=loops)
    t = timeit.Timer("folding_image_2()", "from __main__ import folding_image_2")
    t2 = t.timeit(number=loops)
    print 'folding_image_input_1 is %f times faster than folding_image_input_2' % (t2 / t1)
    print numpy.linalg.norm(folding_image_1() - folding_image_2())


k_max_pooling_backward_grad = numpy.random.rand(5000).astype(dcnn_inner.FLOAT_TYPE).reshape(2, 100, 25)
k_max_pooling_backward_input_image = numpy.random.rand(20000).astype(dcnn_inner.FLOAT_TYPE).reshape(2, 100, 100)
k_max_pooling_backward_kmax_index = numpy.clip(numpy.random.rand(5000) * 100, 0, 99).astype(
    dcnn_inner.U_INT_TYPE).reshape(2, 100, 25)


def k_max_pooling_backward_1():
    return dcnn_inner.k_max_pooling_backward_1(k_max_pooling_backward_grad, \
                                               k_max_pooling_backward_input_image, \
                                               k_max_pooling_backward_kmax_index)


def k_max_pooling_backward_2():
    return dcnn_inner.k_max_pooling_backward_2(k_max_pooling_backward_grad, \
                                               k_max_pooling_backward_input_image, \
                                               k_max_pooling_backward_kmax_index)


def benchmard_k_max_pooling_backward():
    t = timeit.Timer("k_max_pooling_backward_1()", "from __main__ import k_max_pooling_backward_1")
    t1 = t.timeit(number=loops)
    t = timeit.Timer("k_max_pooling_backward_2()", "from __main__ import k_max_pooling_backward_2")
    t2 = t.timeit(number=loops)
    print 'k_max_pooling_backward_1 is %f times faster than k_max_pooling_backward_2' % (t2 / t1)
    print numpy.linalg.norm(k_max_pooling_backward_1()[0] - k_max_pooling_backward_2()[0])
    print numpy.linalg.norm(k_max_pooling_backward_1()[1] - k_max_pooling_backward_2()[1])


if __name__ == '__main__':
    import benchmarks as b
    for func_name, func in  b.__dict__.items():
        if func_name.startswith('benchmark_'):
            func()